Load libraries
library(STutility)
library(ggplot2)
library(ggpubr)
library(dplyr)
library(magrittr)
Assemble spaceranger output files and merge curated meta data
# Mouse brain
samples <- Sys.glob(paths = "../../spaceranger_output/lung/*/filtered_feature_bc_matrix.h5")
imgs <- Sys.glob(paths = "../../spaceranger_output/lung/*/spatial/tissue_hires_image.png")
spotfiles <- Sys.glob(paths = "../../spaceranger_output/lung/*/spatial/tissue_positions_list.csv")
json <- Sys.glob(paths = "../../spaceranger_output/lung/*/spatial/scalefactors_json.json")
infoTable <- data.frame(samples, imgs, spotfiles, json)
infoTable <- cbind(infoTable, arrayID = do.call(rbind, strsplit(infoTable$samples, "/"))[, 5])
curated_metadata <- openxlsx::read.xlsx("../../sheets/curated_RNA_rescue_sample_metadata.xlsx", sheet = 3)
curated_metadata <- setNames(curated_metadata, nm = c("storage_time", "seq_date", "include", "RNA_rescue",
"project", "experimenter", "processer", "comments",
"ID", "paper_id", "RIN", "DV200", "protocol", "source", "arrayID", "spots_under_tissue",
"genes_detected", "fraction_spots_under_tissue",
"median_genes_per_spot", "median_UMIs_per_spot",
"saturation", "reads_mapped_to_probe_set",
"reads_mapped_confidently_to_probe_set",
"reads_mapped_confidently_to_filtered_probe_set",
"reads_mapped_to_genome",
"reads_mapped_confidently_to_genome",
"number_of_panel_genes"))
infoTable <- merge(infoTable, curated_metadata, by = "arrayID")
Load data into a Seurat object
LNG <- InputFromTable(infoTable[c(1, 3:4, 2, 5:6), ])
## Using spotfiles to remove spots outside of tissue
## Loading ../../spaceranger_output/lung/V10B01-037_A1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/lung/V10T03-286_B1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/lung/V10T03-286_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/lung/V10B01-037_B1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/lung/V11A20-384_C1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
## Loading ../../spaceranger_output/lung/V11A20-384_D1/filtered_feature_bc_matrix.h5 count matrix from a 'Visium' experiment
##
## ------------- Filtering (not including images based filtering) --------------
## Spots removed: 0
## Genes removed: 12771
## Saving capture area ranges to Staffli object
## After filtering the dimensions of the experiment is: [23830 genes, 11484 spots]
Load images
LNG <- LoadImages(LNG, time.resolve = FALSE, xdim = 1e3)
## Loading images for 6 samples:
## Reading ../../spaceranger_output/lung/V10B01-037_A1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 1 image from 1979x2000 pixels to 1000x1011 pixels
## Reading ../../spaceranger_output/lung/V10T03-286_B1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 2 image from 2000x1916 pixels to 1000x958 pixels
## Reading ../../spaceranger_output/lung/V10T03-286_D1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 3 image from 2000x1916 pixels to 1000x958 pixels
## Reading ../../spaceranger_output/lung/V10B01-037_B1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 4 image from 1979x2000 pixels to 1000x1011 pixels
## Reading ../../spaceranger_output/lung/V11A20-384_C1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 5 image from 1936x2000 pixels to 1000x1033 pixels
## Reading ../../spaceranger_output/lung/V11A20-384_D1/spatial/tissue_hires_image.png for sample 1 ...
## Scaling down sample 6 image from 1979x2000 pixels to 1000x1011 pixels
Apply rigid transformations, e.g. rotations to get a rough alignment of the H&E images
# Warp transform
LNG <- WarpImages(LNG, verbose = TRUE, transforms = list( "2" = list(angle = -90),
"3" = list(angle = -90),
"5" = list(angle = 180)))
## Creating dummy masks ...Loading masked image for sample 2 ...
## Warping pixel coordinates for 2 ...
## Warping image for 2 ...
## Warping image mask for 2 ...
## Finished alignment for sample2
##
## Loading masked image for sample 3 ...
## Warping pixel coordinates for 3 ...
## Warping image for 3 ...
## Warping image mask for 3 ...
## Finished alignment for sample3
##
## Loading masked image for sample 5 ...
## Warping pixel coordinates for 5 ...
## Warping image for 5 ...
## Warping image mask for 5 ...
## Finished alignment for sample5
Add a new metadata column with a combined protocol and lung ID label
LNG$paper_id <- LNG[[]] %>%
mutate(paper_id = ifelse(paper_id == "1.0", "LNG1", "LNG2")) %>%
pull(paper_id)
LNG$protocol_lng <- gsub(pattern = " ", replacement = "_", paste0(LNG$protocol, "_ID: ", LNG$paper_id))
ST.FeaturePlot(LNG, features = "nFeature_RNA", ncol = 3, label.by = "protocol_lng")
## Loading required namespace: viridis
The standard Visium data sets have reduced numbers of unique genes compared to the data generated by RRST.
VlnPlot(LNG, features = "nFeature_RNA", group.by = "arrayID", split.by = "protocol")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
At a first glance, we can see that the distribution of unique genes is quite different across the two protocols. For RRST, we have much more even coverage across the tissue, whereas in the standard Visium data, we only get high counts in the epithelium.
A reasonable threshold for filtering would be somewhere between 300-1000 unique genes, depending on the tissue type. If parts of the tissue is cell sparse, which is the case for the alveoli, it might be more reasonable to set a lower threshold.
If we use a threshold of 300 unique genes, we can see that ~1.3%-2.3% of spots are discarded from the RR-seq data, whereas ~21%-80% of spots are discarded from the standard Visium data.
LNG[[]] %>%
dplyr::mutate(filter = ifelse(nFeature_RNA < 300, "discard", "keep")) %>%
dplyr::group_by(protocol, paper_id, filter) %>%
dplyr::summarize(Freq = n()) %>%
dplyr::group_by(protocol, paper_id) %>%
dplyr::mutate(percentage_discarded = round(Freq/sum(Freq)*100, digits = 2)) %>%
dplyr::filter(filter == "discard") %>%
dplyr::select(-Freq, -filter) %>%
dplyr::arrange(paper_id)
## `summarise()` has grouped output by 'protocol', 'paper_id'. You can override
## using the `.groups` argument.
## # A tibble: 4 × 3
## # Groups: protocol, paper_id [4]
## protocol paper_id percentage_discarded
## <chr> <chr> <dbl>
## 1 RNA rescue LNG1 1.3
## 2 standard LNG1 21.4
## 3 RNA rescue LNG2 2.29
## 4 standard LNG2 79.8
p <- LNG[[]] %>% ggplot(aes(nFeature_RNA, fill = ifelse(nFeature_RNA < 300, "discard", "keep"))) +
geom_histogram(binwidth = 100) +
geom_vline(xintercept = 300, linetype = "longdash") +
facet_grid(protocol~ID, scales = "free",
labeller = labeller(protocol = c("RNA rescue" = "RRST", "standard" = "standard"),
ID = c("P583-B-LNG3" = "LNG1", "P640-C-LNG5" = "LNG2"))) +
labs(fill = "", x = "Unique genes", y = "Count") +
scale_x_continuous(breaks = c(0, 300, 2000, 4000, 6000), minor_breaks = NULL) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 60, hjust = 1),
axis.text = element_text(size = 10, colour = "black"),
axis.title = element_text(size = 14, colour = "black"),
strip.text = element_text(size = 14, colour = "black", face = "bold"),
legend.text = element_text(size = 10, colour = "black"))
p
pdf(file = "../Suppl_figures/Suppl_Figure_filter_lung_data/Suppl_Figure_2.pdf", width = 10, height = 7)
print(p)
dev.off()
png(filename = "../Suppl_figures/Suppl_Figure_filter_lung_data/Suppl_Figure_2.png", width = 2400, height = 1600, res = 300)
print(p)
dev.off()
LNG$filter <- ifelse(LNG$nFeature_RNA < 300, "discard", "keep")
LNG.subset <- SubsetSTData(LNG, expression = nFeature_RNA > 300)
Now we can see that quite a few spots have been discarded, in particular for the standard Visium data for LNG2.
ST.FeaturePlot(LNG.subset, features = "nFeature_RNA", ncol = 3, label.by = "protocol_lng") &
theme(panel.background = element_rect(fill = "lightgrey"))
Split data by protocol and sample
LNGList <- lapply(unique(LNG.subset$protocol_lng), function(i) {
SubsetSTData(LNG.subset, expression = protocol_lng == i)
})
LNGList <- lapply(LNGList, function(LNG.subset) {
LNG.subset <- LNG.subset %>%
NormalizeData() %>%
ScaleData() %>%
FindVariableFeatures() %>%
RunPCA() %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.8) %>%
RunUMAP(reduction = "pca", dims = 1:20, min.dist = 0.3, n.epochs = 1e3)
})
LNGList <- setNames(LNGList, nm = unique(LNG$protocol_lng))
Plot clusters in UMAP embedding
DimPlot(LNGList[[1]]) & ggtitle("LNG1 - RRST") | DimPlot(LNGList[[2]]) & ggtitle("LNG1 - standard")
DimPlot(LNGList[[3]]) & ggtitle("LNG2 - RRST") | DimPlot(LNGList[[4]]) & ggtitle("LNG2 - standard")
Run DEA with an avg_log2FC threshold of 0.5 and adjusted p-value < 0.01.
# Keep lower threshold avg_log2FC for checking DE genes in all clusters
de.markers.list.025 <- lapply(LNGList, function(se) {
se <- SetIdent(se, value = "seurat_clusters")
FindAllMarkers(se, only.pos = TRUE, logfc.threshold = 0.25)
})
de.markers.list.025 <- lapply(de.markers.list.025, function(de.markers) {
de.markers %>% dplyr::filter(p_val_adj < 0.01)
})
# avg_log2FC > 0.5
# These are the DE genes used for figure plots
de.markers.list <- lapply(de.markers.list.025, function(de.markers) {
de.markers %>% dplyr::filter(avg_log2FC > 0.5)
})
# RRST cluster annotations
ann1 <- c("0" = "AT1-enriched", "1" = "Macrophage-enriched",
"2" = "AT2-enriched", "3" = "Connective tissue 1",
"4" = "Connective tissue 2", "5" = "Connective tissue 3",
"6" = "Airway epithelium", "7" = "Megakaryocyte-enriched",
"8" = "Smooth muscle", "9" = "Immune-enriched",
"10" = "Glands")
ann3 <- c("0" = "AT2-enriched", "1" = "Low-quality cluster 1",
"2" = "Connective tissue 1", "3" = "Connective tissue 2",
"4" = "Low-quality cluster 2", "5" = "Connective tissue 3",
"6" = "Glands", "7" = "Megakaryocyte-enriched",
"8" = "Airway epithelium")
# Violin plots showing the number of unique genes and UMIs
plots_vln <- lapply(1:4, function(i) {
p <- LNGList[[i]][[]] %>%
dplyr::mutate(`Unique genes` = nFeature_RNA, `UMI counts` = nCount_RNA) %>%
reshape2::melt(measure.vars = c("Unique genes", "UMI counts")) %>%
ggplot(aes(paper_id, value, fill = variable)) +
geom_violin(scale = "width", alpha = 0.7) +
geom_boxplot(width = 0.3) +
theme_classic() +
facet_wrap(~variable, scales = "free") +
theme(axis.title = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
legend.position = "none", axis.text.y = element_text(size = 18, colour = "black"),
strip.text = element_text(size = 18))
return(p)
})
# Spatial plots showing the spots discarded
plots_filter <- lapply(c("RNA_rescue_ID:_LNG1", "standard_ID:_LNG1", "RNA_rescue_ID:_LNG1", "standard_ID:_LNG1"), function(i) {
p <- ST.FeaturePlot(SubsetSTData(LNG, expression = protocol_lng %in% i), features = "filter", show.sb = FALSE, pt.size = 1.3,
cols = c("keep" = "lightgray", "discard" = "red"), indices = ifelse(i %in% c("standard_ID:_1.0", "standard_ID:_2.0"), 2, 1)) &
theme(strip.text = element_blank(), plot.title = element_blank(), legend.title = element_blank(), legend.text = element_text(size = 16)) &
guides(fill = guide_legend(override.aes = list(size = 7)))
p <- ggrastr::rasterize(p, layers = "Point", dpi = 300)
return(p)
})
# UMAP plots showing clusters
plots_umap <- lapply(1:4, function(i) {
gg <- cbind(LNGList[[i]][[]], LNGList[[i]]@reductions$umap@cell.embeddings)
p <- ggplot() +
geom_point(data = gg, aes(UMAP_1, UMAP_2, color = seurat_clusters)) +
ggrepel::geom_label_repel(size = 9, label.size = 0.2, data = gg %>%
dplyr::group_by(seurat_clusters) %>%
summarize(med1 = median(UMAP_1), med2 = median(UMAP_2)), aes(med1, med2 + 0.6, fill = seurat_clusters, label = seurat_clusters)) +
theme_classic() +
theme(plot.title = element_blank(),
legend.position = "none", axis.text = element_text(size = 14, colour = "black"), axis.title = element_text(size = 18))
p <- ggrastr::rasterize(p, layers = "Point", dpi = 300)
return(p)
})
# Spatial plots showing the number of unique genes
plots_qc <- lapply(1:4, function(i) {
p <- ST.FeaturePlot(LNGList[[i]], features = "nFeature_RNA", show.sb = FALSE, pt.size = 1.3,
indices = ifelse(i %in% c(2, 4), 2, 1), cols = viridis::magma(n = 11, direction = -1)) &
theme(plot.subtitle = element_blank(),
plot.title = element_blank(),
strip.text = element_blank(),
legend.text = element_text(size = 14),
legend.title = element_text(size = 18)) &
labs(fill = "Unique\ngenes")
p <- ggrastr::rasterize(p, layers = "Point", dpi = 300)
return(p)
})
# HE images
plots_HE <- lapply(1:4, function(i) {
im <- GetStaffli(LNGList[[i]])@rasterlists$processed[[ifelse(i %in% c(2, 4), 2, 1)]]
im[, 1:100] <- "#ffffffff"; im[, (ncol(im) - 120):ncol(im)] <- "#ffffffff"
im <- im[101:(nrow(im) - 100), ]
g <- grid::rasterGrob(im, width = unit(1, "npc"), height = unit(1, "npc"), interpolate = TRUE)
ggplot() +
annotation_custom(g, -Inf, Inf, -Inf, Inf) +
theme_void() +
theme(plot.margin = margin(t = 0, r = 40, b = 0, l = 40))
})
# Spatial plots showing clusters split by clusters
plots_spatial <- lapply(1:4, function(i) {
p <- ST.FeaturePlot(LNGList[[i]], features = "seurat_clusters", split.labels = TRUE, ncol = ifelse(i %in% c(1, 2), 4, 3),
indices = ifelse(i %in% c(2, 4), 2, 1), show.sb = FALSE, pt.size = 0.5) +
theme(plot.title = element_blank(), legend.position = "none", strip.text = element_blank()) &
scale_x_continuous(limits = c(150, 1850)) &
scale_y_continuous(limits = c(150, 1850))
p <- ggrastr::rasterize(p, layers = "Point", dpi = 300)
return(p)
})
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
# Dot plot showing DE genes
plots_dotplot <- lapply(1:4, function(i) {
LNG <- LNGList[[i]]
if (i == 1) {
LNG$seurat_clusters <- factor(ann1[LNG$seurat_clusters], levels = ann1)
}
if (i == 2) {
LNG$seurat_clusters <- factor(ann3[LNG$seurat_clusters], levels = ann3)
}
LNG <- Seurat::SetIdent(LNG, value = "seurat_clusters")
p <- DotPlot(LNG, features = unique(de.markers.list[[i]] %>%
dplyr::mutate(gene_count = table(gene)[gene]) %>%
dplyr::filter(gene_count == 1) %>%
dplyr::group_by(cluster) %>%
dplyr::slice_head(n = 4) %>%
dplyr::pull(gene) %>% rev())) +
scale_color_gradientn(colours = scico::scico(n = 11, palette = "bam", direction = -1)) +
scale_y_discrete(position = "left") +
theme(axis.title = element_blank(), axis.text = element_text(size = 14), legend.text = element_text(size = 14),
legend.title = element_text(size = 14), axis.text.x = element_text(angle = 60, hjust = 1, size = 16)) +
coord_flip()
p$guides$colour$title <- "Avg. Expr."
p$guides$size$title <- "Pct. Expr."
return(p)
})
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
This patchwork includes only LNG1 and each plot type is places side by side for a simple comparison between the two protocols.
# Design for patchwork
design <- c(
patchwork::area(1, 1, 1, 1),
patchwork::area(1, 2, 1, 2),
patchwork::area(2, 1, 2, 1),
patchwork::area(2, 2, 2, 2),
patchwork::area(3, 1, 3, 1),
patchwork::area(4, 1, 4, 1),
patchwork::area(3, 2, 3, 2),
patchwork::area(4, 2, 4, 2),
patchwork::area(1, 3, 1, 3),
patchwork::area(1, 4, 1, 4),
patchwork::area(2, 3, 2, 3),
patchwork::area(2, 4, 2, 4),
patchwork::area(3, 3, 5, 3),
patchwork::area(3, 4, 5, 4)
)
# Create patchwork
p <- plots_HE[[1]] +
plots_HE[[2]] +
plots_qc[[1]] +
plots_qc[[2]] +
plots_umap[[1]] +
plots_spatial[[1]] +
plots_umap[[2]] +
plots_spatial[[2]] +
plots_vln[[1]] +
plots_vln[[2]] +
plots_filter[[1]] +
plots_filter[[2]] +
plots_dotplot[[1]] +
plots_dotplot[[2]] +
patchwork::plot_layout(design = design, heights = c(1.5, 2, 2, 2, 0.5))
p
# Export patchwork as pdf
pdf(file = "plots/figure_2_patchwork_side_by_side.pdf", width = 26, height = 20)
print(p)
dev.off()
This patchwork includes only LNG2 and each plot type is places side by side for a simple comparison between the two protocols.
# Design for patchwork
design <- c(
patchwork::area(1, 1, 1, 1),
patchwork::area(1, 2, 1, 2),
patchwork::area(2, 1, 2, 1),
patchwork::area(2, 2, 2, 2),
patchwork::area(3, 1, 3, 1),
patchwork::area(4, 1, 4, 1),
patchwork::area(3, 2, 3, 2),
patchwork::area(4, 2, 4, 2),
patchwork::area(1, 3, 1, 3),
patchwork::area(1, 4, 1, 4),
patchwork::area(2, 3, 2, 3),
patchwork::area(2, 4, 2, 4),
patchwork::area(3, 3, 5, 3),
patchwork::area(3, 4, 5, 4)
)
# Create patchwork
p <- plots_HE[[3]] +
plots_HE[[4]] +
plots_qc[[3]] +
plots_qc[[4]] +
plots_umap[[3]] +
plots_spatial[[3]] +
plots_umap[[4]] +
plots_spatial[[4]] +
plots_vln[[3]] +
plots_vln[[4]] +
plots_filter[[3]] +
plots_filter[[4]] +
plots_dotplot[[3]] +
plots_dotplot[[4]] +
patchwork::plot_layout(design = design, heights = c(1.5, 2, 2, 2, 0.5))
p